{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "58b1517b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.impute import SimpleImputer\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.pipeline import Pipeline, make_pipeline\n",
    "from sklearn.compose import ColumnTransformer\n",
    "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
    "from sklearn.linear_model import SGDRegressor\n",
    "from custom_transformer import DictToDFTransformer\n",
    "import joblib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "137b49fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data = pd.read_csv(\"train.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "7f091668",
   "metadata": {},
   "outputs": [],
   "source": [
    "features = [\n",
    "    \"MSZoning\",\n",
    "    \"LotArea\",\n",
    "    \"LotShape\",\n",
    "    \"Utilities\",\n",
    "    \"YrSold\",\n",
    "    \"Neighborhood\",\n",
    "    \"OverallQual\",\n",
    "    \"YearBuilt\",\n",
    "    \"SaleType\",\n",
    "    \"GarageArea\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "66be31ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = train_data[features]\n",
    "y = np.log1p(train_data[\"SalePrice\"])\n",
    "categorical_features = df.select_dtypes(object)\n",
    "numerical_features = df.select_dtypes(exclude=object)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "23eae04b",
   "metadata": {},
   "outputs": [],
   "source": [
    "p = Pipeline(\n",
    "    [\n",
    "        (\"dicttodf\", DictToDFTransformer()),\n",
    "        (\n",
    "            \"preprocess\",\n",
    "            ColumnTransformer(\n",
    "                [\n",
    "                    (\n",
    "                        \"numerical\",\n",
    "                        make_pipeline(\n",
    "                            SimpleImputer(strategy=\"mean\"),\n",
    "                            StandardScaler(),\n",
    "                        ),\n",
    "                        sorted(numerical_features.columns),\n",
    "                    ),\n",
    "                    (\n",
    "                        \"categorical\",\n",
    "                        make_pipeline(\n",
    "                            SimpleImputer(strategy=\"most_frequent\"),\n",
    "                            OneHotEncoder(handle_unknown=\"ignore\", sparse=False),\n",
    "                        ),\n",
    "                        sorted(categorical_features.columns),\n",
    "                    ),\n",
    "                ]\n",
    "            ),\n",
    "        ),\n",
    "        (\"regressor\", SGDRegressor(random_state=666, learning_rate=\"adaptive\")),\n",
    "    ]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3f6a55b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = p.fit(df, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c198c543",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pipeline(steps=[('dicttodf',\n",
       "                 <custom_transformer.DictToDFTransformer object at 0x7fa5120c8e20>),\n",
       "                ('preprocess',\n",
       "                 ColumnTransformer(transformers=[('numerical',\n",
       "                                                  Pipeline(steps=[('simpleimputer',\n",
       "                                                                   SimpleImputer()),\n",
       "                                                                  ('standardscaler',\n",
       "                                                                   StandardScaler())]),\n",
       "                                                  ['GarageArea', 'LotArea',\n",
       "                                                   'OverallQual', 'YearBuilt',\n",
       "                                                   'YrSold']),\n",
       "                                                 ('categorical',\n",
       "                                                  Pipeline(steps=[('simpleimputer',\n",
       "                                                                   SimpleImputer(strategy='most_frequent')),\n",
       "                                                                  ('onehotencoder',\n",
       "                                                                   OneHotEncoder(handle_unknown='ignore',\n",
       "                                                                                 sparse=False))]),\n",
       "                                                  ['LotShape', 'MSZoning',\n",
       "                                                   'Neighborhood', 'SaleType',\n",
       "                                                   'Utilities'])])),\n",
       "                ('regressor',\n",
       "                 SGDRegressor(learning_rate='adaptive', random_state=666))])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5a96ee5e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['model.joblib']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joblib.dump(model, \"model.joblib\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f3cdeb3f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['GarageArea',\n",
       " 'LotArea',\n",
       " 'OverallQual',\n",
       " 'YearBuilt',\n",
       " 'YrSold',\n",
       " 'LotShape',\n",
       " 'MSZoning',\n",
       " 'Neighborhood',\n",
       " 'SaleType',\n",
       " 'Utilities']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cols = sorted(numerical_features.columns) + sorted(categorical_features.columns)\n",
    "cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c2093efa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'GarageArea': 548,\n",
       "  'LotArea': 8450,\n",
       "  'OverallQual': 7,\n",
       "  'YearBuilt': 2003,\n",
       "  'YrSold': 2008,\n",
       "  'LotShape': 'Reg',\n",
       "  'MSZoning': 'RL',\n",
       "  'Neighborhood': 'CollgCr',\n",
       "  'SaleType': 'WD',\n",
       "  'Utilities': 'AllPub'}]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample_request = df[cols].head(1).to_dict(\"records\")\n",
    "sample_request"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "b3d18862",
   "metadata": {},
   "outputs": [],
   "source": [
    "request = [\n",
    "    {\n",
    "        \"MSZoning\": \"RL\",\n",
    "        \"LotArea\": 8450,\n",
    "        \"LotShape\": \"Reg\",\n",
    "        \"Utilities\": \"AllPub\",\n",
    "        \"YrSold\": 2008,\n",
    "        \"Neighborhood\": \"CollgCr\",\n",
    "        \"OverallQual\": 7,\n",
    "        \"YearBuilt\": 2003,\n",
    "        \"SaleType\": \"WD\",\n",
    "        \"GarageArea\": 548,\n",
    "    }\n",
    "]\n",
    "response = model.predict(sample_request)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "3462c4e7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([12.20283282])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0d82a16",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
